# Define here the models for your scraped items
#
# See documentation in:
# https://docs.scrapy.org/en/latest/topics/items.html

import scrapy
from itemloaders.processors import TakeFirst, MapCompose, Join
from scrapy.loader import ItemLoader


class ArticleItemLoader(ItemLoader):
    # 自定义itemloader,值取数组的第一个，修改item中的loader
    #default_output_processor = TakeFirst()
    pass


class ChinanewsItem(scrapy.Item):
    # define the fields for your item here like:
    # name = scrapy.Field()
    pass


def handle_addr(s):
    s = s.replace("\r", '')
    s = s.replace("\t", '')
    s = s.replace("\n", '')
    if s.strip() != '':
        return s

def rmGif(s):
    if s.endswith('gif'):
        s = ''
    if s.strip() != '':
        return s

class XinhuanetItem(scrapy.Item):
    # default_output_processor = TakeFirst()
    title = scrapy.Field(
        input_processor=MapCompose(handle_addr)
    )
    content = scrapy.Field()
    url = scrapy.Field()
    images = scrapy.Field(
        #input_processor=MapCompose(rmGif)
    )
    image_path = scrapy.Field()
    pass

